//-------------------------------------------------------------------------------------------------------------------------------
//
//Program to compute Duranton and Overman (2005, 2008) K-densities for industry localization point patterns
//Program also computes some standard CDF for bilateral distance distributions
//
//This  version: February 26, 2014
//First version:     June 23, 2010
//
//NEW (25/07/2010): More optimization on the compute_kdense function, especially the weighted version (33% increase in speed)
//NEW (25/07/2010): Added alloc_array and free_array functions, fixed a tiny memory leak
//NEW (10/09/2010): Fixed a bug in reading in the industry data (read lnfile instead on infile, this could mess up stuff)...
//NEW (10/09/2010): Automated the use of a file for different location universes
//NEW (06/10/2010): Force exact erf computation for small sample sizes (avoids seg_faults in the lookup tables)
//NEW (20/11/2010): Random reshuffling is now done without replacement by using permutation
//NEW (16/12/2010): Added a file addendum procedure to identify small sample sizes (<= 25)
//NEW (29/12/2011): Added a procedure for measuring the randomness of firm exit (NO WEIGHTED PROCEDURE YET)
//					Two files: [1] Location universe includes all the -1, 0 firms (lsize) (only lat, lon; first line = lsize, zsize)
//							   [2] Sample includes all the 0, 1 firms (ssize) (lat, lon, empl; first line = ssize, &thrash, &thrash)
//NEW (18/01/2013): Fixed an issue with the weighted optimal bandwidth... a pain in the a**
//NEW (26/02/2014): Added a procedure to compute CDF's for bilateral distances, without smoothing, both weighted and unweighted
//
//-------------------------------------------------------------------------------------------------------------------------------

#include <iostream>
#include <gsl/gsl_sf_bessel.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_poly.h>
#include <gsl/gsl_complex_math.h>
#include <gsl/gsl_sf.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_sort.h>

#include <vector>

#include <string.h>
#include <algorithm>

#define DEG2RAD(x) (x * M_PI) / 180.0;
#define EARTH_RADIUS 6378.137

using namespace std;

enum LOGIC { FALSE = 0, TRUE = 1 };

struct parameters
{
    char temp[40];			//Stores the name of the industry that is currently processed
    bool weight_flag;		//TRUE if weighted, FALSE if unweighted
    bool fast_approx;		//TRUE if fast lookup-table approximation is used, FALSE otherwise
    //NEW_29_12_2011
    bool exit_proc;			//TRUE if computing the firm exit scenario, FALSE for normal K-densities
    //NEW_29_12_2011
    double tot_weight;		//Total weights for the weighting scheme
    double bandw;			//Silverman bandwidth for k-density estimation
    int replic;				//Number of Monte Carlo replications
    int ssize;				//Sample size of the current industry
    int lsize;				//Size of location universe
    //NEW_29_12_2011
    int zsize;				//Number of continuing firms in the base year (for exit procedure)
    //NEW_29_12_2011
    int cut;				//Cutoff distance for kernel density estimation
    int numind;				//Number of industries to be processed
    double q25, q75;		//25 and 75 distance percentiles
    double q25w, q75w;		//25 and 75 distance percentiles, weighted bandwidth
    double p1, p2;			//Elements for the weighted percentiles
    int q1, q2;				//Elements for the weighted percentiles
    double Q1, Q3;			//Weighted first and third quartiles
    //NEW 26_02_2014
    bool CDF_proc;          //TRUE if computing CDF using a non-DO procedure
    //NEW 26_02_2014
};


//These are the parameters for the lookup table of the erf approximation
//The size is set statically before execution for the table to be on the stack, which is faster than heap memory
//WARNING: This may cause a segfault on systems with little stack memory!
#define APPROX_SIZE 500000 //1000000
#define APPROX_GRID 100.0

double erf_table[APPROX_SIZE];


//Kernel density estimations for large distances may yield floating-point underflows
//Underflows will be truncated to 0 and the error handler turned off
gsl_error_handler_t *FP_error_handler = gsl_set_error_handler_off();


//Allocate array

double** alloc_array(int rows, int cols, char *process)
{
    double **array_to_alloc = new double*[rows];
    
    for (int i=0; i < rows; i++)
    {
        array_to_alloc[i] = new double[cols];
        
        if (array_to_alloc[i] == NULL)
        {
            fprintf(stderr, "%s: ERROR - Memory could not be allocated, aborting\n", process);
            return(NULL);
        }
    }
    return(array_to_alloc);
}


//Free array

double** free_array(int rows, double **ptr, char *process)
{
    for (int i = 0; i < rows; i++)
    {
        free(ptr[i]);
    }
    
    free(ptr);
    
    return(NULL); //In order to avoid having "dangling" pointers
}


//Approximation of the erf function using a lookup table between [0, APPROX_SIZE/APPROX_GRID]

void compute_approx()
{
    for (int c = 0; c < APPROX_SIZE; c++)
    {
        erf_table[c] = gsl_sf_erf_Z((double) c / (double) APPROX_GRID);
    }
}


//This function computes the weights for the k-density estimation using a particular weighting scheme

void compute_weights(parameters *params, double disspace[][3], double **weights)
{
    params->tot_weight = 0;
    
    for (int i=0; i < params->ssize; i++)
    {
        for (int j = i + 1; j < params->ssize; j++)
        {
            //Here we use an additive weighting scheme, contrary to Duranton and Overman (2005)
            weights[i][j] = disspace[i][2] + disspace[j][2];
            params->tot_weight += 2.0 * weights[i][j];
        }
    }
    
    return;
}

//TEST
//This function computes the great-circle distances between all observations

void compute_distances(parameters *params, double disspace[][3], double **distab)
{
    double ilat  = 0, ilon  = 0, jlat  = 0, jlon  = 0;
    
    for (int i=0; i < params->ssize; i++)
    {
        for (int j=i+1; j < params->ssize; j++)
        {
            ilat=disspace[i][0];
            jlat=disspace[j][0];
            ilon=disspace[i][1];
            jlon=disspace[j][1];
            
            if ((ilat != jlat) || (ilon != jlon))
            {
                //REPLACE WITH THE CORRESPONDING GSL FUNCTIONS IF NECESSARY...
                distab[i][j] = acos(sin(ilat) * sin(jlat) + cos(fabs(ilon - jlon)) * cos(ilat) * cos(jlat)) * EARTH_RADIUS;
                
                if (gsl_isnan(distab[i][j]))
                {
                    distab[i][j] = 0;
                }
            }
            else
            {
                distab[i][j] = 0;
            }
            
        }
    }
    return;
}


//This function computes the (unweighted) kernel density between [0, cut] and puts it into kdense
//This function has been heavily optimized (approx 96% of the execution time occurs here!)
//Latest version: July 25, 2010

void compute_kdense(parameters *params, double **weights, double **distab, double kdense[][2])
{
    register double eval1, eval2, cumul, temp1, temp2, eval1_approx, eval2_approx;
    
    register double optim_bw = params->bandw;
    
    int FAST = params->fast_approx;
    
    //For small samples, use exact method
    if (params->ssize < 20) {
        FAST = FALSE;
    }
    
    //double *middle = &erf_table[(APPROX_SIZE - 1) / 2];
    
    for (register int z = 0; z < params->cut; z++)
    {
        cumul = 0;
        
        for (register int i = 0; i < params->ssize; i++)
        {
            for (register int j = i+1; j < params->ssize ; j++)
            {
                if (FAST)
                {
                    temp1 = fabs((((double) z - distab[i][j]) / optim_bw) * APPROX_GRID);
                    temp2 = (((double) z + distab[i][j]) / optim_bw) * APPROX_GRID;
                    
                    eval1_approx = (erf_table[(int) temp1 + 1] +  erf_table[(int) temp1]) * 0.5;
                    eval2_approx = (erf_table[(int) temp2 + 1] +  erf_table[(int) temp2]) * 0.5;
                    
                    cumul += eval1_approx;
                    cumul += eval2_approx;
                }
                else
                {
                    eval1 = gsl_sf_erf_Z(((double) z - distab[i][j]) / optim_bw);
                    eval2 = gsl_sf_erf_Z(((double) z + distab[i][j]) / optim_bw);
                    
                    cumul += eval1;
                    cumul += eval2;
                }
            }
        }
        
        kdense[z][0] = cumul * (2.0 / ((params->ssize * (params->ssize - 1)) * optim_bw));
        kdense[z][1] = z;
    }
    
    return;
}


//This function computes the (weighted) kernel density between [0, cut] and puts it into kdense
//This function has been heavily optimized (approx 96% of the execution time occurs here!)
//Latest version: July 25, 2010
//REMARK: Weighted k-densities are approximately 5% slower to compute than unweighted ones

void compute_kdense_w(parameters *params, double **weights, double **distab, double kdense[][2])
{
    register double eval1, eval2, cumul, temp1, temp2, eval1_approx, eval2_approx;
    
    register double optim_bw = params->bandw;
    
    int FAST = params->fast_approx;
    
    //For small samples, use exact method
    if (params->ssize < 20) {
        FAST = FALSE;
    }
    
    for (register int z = 0; z < params->cut; z++)
    {
        cumul = 0;
        
        for (register int i = 0; i < params->ssize; i++)
        {
            for (register int j = i+1; j < params->ssize ; j++)
            {
                if (FAST)
                {
                    temp1 = fabs((((double) z - distab[i][j]) / optim_bw) * APPROX_GRID);
                    temp2 = (((double) z + distab[i][j]) / optim_bw) * APPROX_GRID;
                    
                    eval1_approx = (erf_table[(int) temp1 + 1] +  erf_table[(int) temp1]) * 0.5 * weights[i][j];
                    eval2_approx = (erf_table[(int) temp2 + 1] +  erf_table[(int) temp2]) * 0.5 * weights[i][j];
                    
                    cumul += eval1_approx;
                    cumul += eval2_approx;
                }
                else
                {
                    eval1 = gsl_sf_erf_Z(((double) z - distab[i][j]) / optim_bw) * weights[i][j];
                    eval2 = gsl_sf_erf_Z(((double) z + distab[i][j]) / optim_bw) * weights[i][j];
                    
                    cumul += eval1;
                    cumul += eval2;
                }
            }
        }
        
        kdense[z][0] = cumul * (2.0 / (params->tot_weight * optim_bw));
        kdense[z][1] = z;
    }
    
    return;
}


//Function to compute the CDF (unweighted)
void compute_CDF(parameters *params, double **weights, double **distab, double kdense[][2])
{
    register double cumul;
    
    for (register int z = 0; z < params->cut; z++)
    {
        cumul = 0;
        
        for (register int i = 0; i < params->ssize; i++)
        {
            for (register int j = i+1; j < params->ssize ; j++)
            {
                if (distab[i][j] < z)
                {
                    cumul++;
                }
            }
        }
        
        kdense[z][0] = cumul * (2.0 / ((params->ssize * (params->ssize - 1))));
        kdense[z][1] = z;
    }
    
    return;
}

//Function to compute the CDF (weighted)
void compute_CDF_w(parameters *params, double **weights, double **distab, double kdense[][2])
{
    register double cumul;
    
    for (register int z = 0; z < params->cut; z++)
    {
        cumul = 0;
        
        for (register int i = 0; i < params->ssize; i++)
        {
            for (register int j = i+1; j < params->ssize ; j++)
            {
                if (distab[i][j] < z)
                {
                    cumul += weights[i][j];
                }
            }
        }
        
        kdense[z][0] = cumul * (2.0 / (params->tot_weight));
        kdense[z][1] = z;
    }
    
    return;
}

//Main program

int main(int argc, char *argv[])
{
    
    //Main variables used in the program
    
    parameters params;				//Holds the execution parameters;
    char fpath[255], wpath[255];	//Paths to the industry files, and the warnings log
    char opath[255], dpath[255];	//Paths for writing output (k-dense, conf bands)
    int thrsh = 0;					//Thrashcan
    unsigned long c = 1, t = 0;		//Various counters
    clock_t start, finish;			//Clock to keep track of running time
    double temp1, temp2;			//Temporary variables for weighted bandwidth
    
    //The list of industries to be processed is passed to the program as the first argument argv[1]
    FILE *inlist;
    
    if ((inlist = fopen(argv[1], "r")) == NULL) {
        fprintf(stderr, "%s: ERROR - File %s not found, aborting.\n", argv[0], argv[1]);
        return(1);
    }
    
    //The location universe to be used is passed to the program as the second argument argv[2]
    FILE *lndata;
    
    if ((lndata = fopen(argv[2], "r")) == NULL) {
        fprintf(stderr, "%s: ERROR - File %s not found, aborting.\n", argv[0], argv[2]);
        return(1);
    }
    
    //Output is written to the directory specified as the third argument argv[3], include "/"
    strcpy(opath, argv[3]);
    
    //Determine the cutoff distance for the k-density estimates
    if ((params.cut = atoi(argv[4])) < 1) {
        fprintf(stderr, "%s: ERROR - invalid cutoff distance\n", argv[0]);
        return(2);
    }
    
    //Determine the number of monte-carlo replications to be run
    if ((params.replic = atoi(argv[5])) < 0) {
        fprintf(stderr, "%s: ERROR - invalid number of replications\n", argv[0]);
    }
    else if ((	params.replic = atoi(argv[5])) == 0) {
        fprintf(stderr, "%s: WARNING - no standard K-density mcmc replications will be run\n", argv[0]);
    }
    
    //Determine if a weighted mc procedure is requested (optional argument)
    //argv[6] = 0 -> unweighted (default), argv[6] = 1 -> weighted
    if (argc > 6) {
        params.weight_flag = (bool) atoi(argv[6]);
    }
    else {
        params.weight_flag = FALSE;
    }
    
    if (argc > 7) {
        params.fast_approx = (bool) atoi(argv[7]);
    }
    else {
        params.fast_approx = FALSE;
    }
    
    //NEW_29_12_2011
    if (argc > 8) {
        params.exit_proc = (bool) atoi(argv[8]);
    }
    else {
        params.exit_proc = FALSE;
    }
    //NEW_29_12_2011
    
    
    //NEW_26_02_2014
    if (argc > 9) {
        params.CDF_proc = (bool) atoi(argv[9]);
    }
    else {
        params.CDF_proc = FALSE;
    }
    //NEW_26_02_2014
    
    
    //Get the number of industries to be processed
    fscanf(inlist, "%d", &params.numind);
    
    //Display the parameters chosen to track down errors
    fprintf(stdout, "%s: Number of industries to be processed = %d\n", argv[0], params.numind);
    
    if (!params.weight_flag) {
        fprintf(stdout, "%s: Not using weights in computations\n", argv[0]);
    }
    else {
        fprintf(stdout, "%s: Using weights in compuations\n", argv[0]);
    }
    
    if (!params.fast_approx) {
        fprintf(stdout, "%s: Using exact computations as implemented by gsl_sf_erf_Z\n", argv[0]);
    }
    else {
        fprintf(stdout, "%s: Using fast interpolation approximation of gsl_sf_erf_Z\n", argv[0]);
    }
    
    //NEW_29_12_2011
    if (!params.exit_proc) {
        fprintf(stdout, "%s: Running standard K-density computions\n", argv[0]);
    }
    else {
        fprintf(stdout, "%s: Running K-densities for counterfactual exit\n", argv[0]);
    }
    //NEW_29_12_2011
    
    
    //NEW_26_02_2014
    if (!params.CDF_proc) {
        fprintf(stdout, "%s: Running standard K-density computions\n", argv[0]);
    }
    else {
        fprintf(stdout, "%s: Running CDF computations based on raw data\n", argv[0]);
    }
    //NEW_26_02_2014
    
    
    /*OLD LOC SPACE*/
    
    
    //Set up the fast gsl_sf_erf_Z approximation table
    compute_approx();
    
    //Setup warnings file
    FILE *warn_log;
    
    strcpy(wpath, argv[3]);
    strcat(wpath,"warnings_log.txt");
    
    if ((warn_log = fopen(wpath, "a+")) == NULL) {
        fprintf(stderr, "%s: ERROR - File %s could not be created, aborting.\n", argv[0], argv[1]);
        return(1);
    }
    
    
    //******************************************************************************************
    //STEP 1:
    //This is the main body of the programm - Loop over the different industries to be processed
    //Counter = o_loop (outer loop)
    //******************************************************************************************
    
    
    for (int o_loop = 0; o_loop < params.numind; o_loop++)
    {
        //Automatically read-in the location universe for each industry
        fprintf(stdout, "%s: START\n", argv[0]);
        
        //We now read into memory the data for the location universe
        //lsize is the number of locations from which we are going to sample in the programm
        char toopen[255];
        fscanf(lndata, "%s", toopen);
        fprintf(stdout, "%s: Using location universe %s\n", argv[0], toopen);
        
        FILE *locauni = fopen(toopen, "r");
        
        //fscanf(locauni, "%d %d", &params.lsize, &thrsh);
        //NEW_29_12_2011
        fscanf(locauni, "%d %d", &params.lsize, &params.zsize);
        //NEW_29_12_2011
        
        //Location universe is of lsize observations with two values (lat, lon)
        
        float locspace[params.lsize][2];
        
        t=0;
        
        while (fscanf(locauni, "%f %f", &locspace[t][0], &locspace[t][1]) != EOF)
        {
            //We first have to convert the latitude and the longitude into radians
            locspace[t][0] = DEG2RAD(locspace[t][0]);
            locspace[t][1] = DEG2RAD(locspace[t][1]);
            
            t++;
        }
        
        fclose(locauni);
        
        fprintf(stdout, "%s: Size of sampling location universe = %d\n", argv[0], params.lsize);
        fprintf(stdout, "%s: Running %d sampling replications, cutoff distance = %d\n", argv[0], params.replic, params.cut);
        
        //We want to keep track of execution time for each industry we process
        start = clock();
        
        //Read industry information from the file
        fscanf(inlist, "%s", fpath);
        
        c = 0;
        
        //Retrieve the industry name from the full path, stripping away the path and the file extension
        char *ptr_c = fpath;
        while (*ptr_c++ != '\0') { }
        while (*(--ptr_c) != '/') { }
        ptr_c++;
        while ((params.temp[c++] = *ptr_c++) != '.') { }
        params.temp[--c] = '\0';
        
        
        //Generate output file name for empirical density (dpath), local conf bounds, and global conf bounds (opath)
        //Output is written to the directory specified as the third argument argv[3], including the terminating "/"
        strcpy(dpath, argv[3]);
        strcat(dpath, params.temp);
        
        if (!params.weight_flag)
        {
            if (!params.CDF_proc)
            { strcat(dpath,"_kdense_emp.txt"); }
            else
            { strcat(dpath,"_CDF_emp.txt"); }
        }
        else
        {
            if (!params.CDF_proc)
            { strcat(dpath,"_kdense_emp_W.txt"); }
            else
            { strcat(dpath,"_CDF_emp_W.txt"); }
        }
        
        
        strcpy(opath, argv[3]);
        strcat(opath, params.temp);
        
        if (!params.weight_flag)
        {
            strcat(opath,"_kdense_mcs.txt");
        }
        else
        {
            strcat(opath,"_kdense_mcs_W.txt");
        }
        
        //Read in the data for the industry (latitude, longitude, employment)
        FILE *indata;
        
        if ((indata = fopen(fpath, "r")) == NULL)
        {
            fprintf(stderr, "%s: ERROR - File %s not found, aborting.\n", argv[0], fpath);
            return(1);
        }
        
        //Get industry size
        fscanf(indata, "%d", &params.ssize);
        
        //Write warning flag if small sample size
        if (params.ssize < 25)
        {
            fprintf(warn_log, "%s: %d (small)\n", params.temp, params.ssize);
        }
        else
        {
            fprintf(warn_log, "%s: %d\n", params.temp, params.ssize);
        }
        
        fscanf(indata, "%d", &thrsh); // -> thrashcan
        fscanf(indata, "%d", &thrsh); // -> thrashcan
        
        double disspace[params.ssize][3];
        
        t=0;
        
        while (fscanf(indata, "%lf %lf %lf", &disspace[t][0], &disspace[t][1], &disspace[t][2]) != EOF)
        {
            //We first have to convert the latitude and the longitude into radians
            disspace[t][0] = DEG2RAD(disspace[t][0]);
            disspace[t][1] = DEG2RAD(disspace[t][1]);
            
            t++;
        }
        
        fclose(indata);
        
        fprintf(stdout, "%s: Processing industry %s with %d firms\n", argv[0], params.temp, params.ssize);
        
        if (params.ssize < 20) {
            fprintf(stdout, "%s: Warning -- small samples size, forcing gsl_sf_erf_Z\n", argv[0]);
        }
        
        //Dynamically allocate a two-dimensional array to hold the empirical weights
        /*double **weights = new double*[params.ssize];
         
         for (int i=0; i < params.ssize; i++)
         {
         weights[i] = new double[params.ssize];
         
         if (weights[i] == NULL)
         {
         fprintf(stderr, "%s: ERROR - Memory for weights could not be allocated, aborting\n", argv[0]);
         return(2);
         }
         
         }*/
        
        double **weights = alloc_array(params.ssize, params.ssize, argv[0]);
        
        
        //Compute the weights for the empirical sample, using the function weight_scheme
        //Alternative weighting schemes are possible by modifying this function
        compute_weights(&params, disspace, weights);
        
        //Dynamically allocate a two-dimensional array to hold the empirical distances
        /*double **distab = new double*[params.ssize];
         
         for (int i=0; i < params.ssize; i++)
         {
         distab[i] = new double[params.ssize];
         
         if (distab[i] == NULL)
         {
         fprintf(stderr, "%s: ERROR - Memory for distances could not be allocated, aborting\n", argv[0]);
         return(2);
         }
         
         }*/
        
        double **distab = alloc_array(params.ssize, params.ssize, argv[0]);
        
        //NEW [20/11/2010]
        //Array for random permutations, set up the identity permutation initially
        int permute_space[params.lsize];
        
        for (int i = 0; i < params.lsize; i++)
        {
            permute_space[i] = i;
        }
        //NEW [20/11/2010]
        
        //******************************************************************************************
        //STEP 2:
        //Compute the empirical distances and k-density for the selected industry
        //******************************************************************************************
        
        
        //Compute the empirical distances
        compute_distances(&params, disspace, distab);
        
        double kdense[params.cut][2];
        
        
        //NEW_26_02_2014
        //Compute the raw CDF -- Used as dependent variable in the Statcan paper
        if (params.CDF_proc == TRUE)
        {
            printf("Entering the CDF computations...\n");
            
            if (!params.weight_flag)
            {
                compute_CDF(&params, weights, distab, kdense);
            }
            else
            {
                compute_CDF_w(&params, weights, distab, kdense);
            }
            
            FILE *osdata;
            
            if((osdata = fopen(dpath, "w")) == NULL)
            {
                fprintf(stdout, "%s: ERROR - Output file %s could not be opened, aborting\n", argv[0], dpath);
                return(1);
            }
            
            for (int j=0; j < params.cut; j++)
            {
                fprintf(osdata,"%+.18lf %+.10lf\n", kdense[j][0], kdense[j][1]);
            }
            
            fclose(osdata);
            
            //Go to next industry by skipping the rest of the iteration
            continue;
        }
        //NEW_26_02_2014
        
        //printf("Something wrong with the continue statement in the CDF computations...\n");
        
        
        
        c = 0;
        //unsigned long size = params.ssize * (params.ssize - 1);
        
        //Dynamically allocate a two-dimensional array to hold the empirical distances and their reflection
        double *distemp = new double[(long) params.ssize * (params.ssize - 1)];
        double *distemp2 = new double[(long) params.ssize * (params.ssize - 1)];
        
        //Allocate array to hold the weights
        double *wtemp = new double[(long) params.ssize * (params.ssize - 1)];
        double *wtemp2 = new double[(long) params.ssize * (params.ssize - 1)];
        
        size_t *wpermute = new size_t[(long) params.ssize * (params.ssize - 1)];
        
        if (distemp == NULL)
        {
            fprintf(stderr, "%s: ERROR - Memory for temp distances could not be allocated, aborting\n", argv[0]);
            return(2);
        }
        
        for (int i = 0; i < params.ssize; i++)
        {
            for (int j = i + 1; j < params.ssize; j++, c++)
            {
                distemp[c] = distab[i][j];
                distemp2[c] = distab[i][j];
                wtemp[c] = weights[i][j];
            }
        }
        
        //We reflect the positive distances into negative distances to avoid estimation bias around d = 0
        for (int i = 0; i < params.ssize; i++)
        {
            for (int j = i + 1; j < params.ssize; j++, c++)
            {
                distemp[c] = -distab[i][j];
                distemp2[c] = -distab[i][j];
                wtemp[c] = weights[i][j];
            }
        }
        
        
        //Set bandwidth according to Silverman (1986)
        gsl_sort_index(wpermute, distemp, 1, params.ssize * (params.ssize - 1));
        gsl_sort(distemp, 1, params.ssize * (params.ssize - 1));
        
        for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
        {
            wtemp2[i] = wtemp[wpermute[i]]/params.tot_weight;
        }
        
        //Compute the weighted sample interquartile range
        temp1 = temp2 = 0.0;
        
        for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
        {
            temp1 += wtemp2[i];
            temp2 = temp1 + wtemp2[i+1];
            if (temp1 <= 0.25 && temp2 > 0.25)
            {
                params.q1 = i;
                break;
            }
        }
        
        temp1 = temp2 = 0.0;
        
        for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
        {
            temp1 += wtemp2[i];
            temp2 = temp1 + wtemp2[i+1];
            if (temp1 <= 0.75 && temp2 > 0.75)
            {
                params.q2 = i;
                break;
            }
        }
        
        params.p1 = 0.25; params.p2 = 0.75;
        
        for (int i = 0; i < params.q1; i++)
        {
            params.p1 -= wtemp2[i];
        }
        
        for (int i = 0; i < params.q2; i++)
        {
            params.p2 -= wtemp2[i];
        }
        
        
        params.q75 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.75);
        params.q25 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.25);
        
        params.Q1 = distemp[params.q1] + params.p1*(distemp[params.q1 + 1] - distemp[params.q1]);
        params.Q3 = distemp[params.q2] + params.p2*(distemp[params.q2 + 1] - distemp[params.q2]);
        
        //Compute weighted quantiles
        
        
        //Determine the optimal bandwidth according to Silverman, depending on whether weights are used or not
        if (!params.weight_flag)
        {
            params.bandw = 1.06 * pow(params.ssize * (params.ssize - 1), -0.2) * gsl_min(gsl_stats_sd(distemp, 1,
                                                                                                      params.ssize * (params.ssize - 1)), (params.q75 - params.q25) / 1.34);
            compute_kdense(&params, weights, distab, kdense);
        }
        else
        {
            //params.bandw = 1.06 *pow(params.tot_weight / 2.0, -0.2) * gsl_min(gsl_stats_sd(distemp, 1,
            //																params.ssize * (params.ssize - 1)), (params.q75 - params.q25) / 1.34);
            
            //FIXED THE WEIGHTED BANDWIDTH SELECTION
            params.bandw = 1.06 *pow(params.ssize * (params.ssize - 1), -0.2) * gsl_min(gsl_stats_wsd(wtemp, 1, distemp2, 1,
                                                                                                      params.ssize * (params.ssize - 1)), (params.Q3 - params.Q1) / 1.34);
            compute_kdense_w(&params, weights, distab, kdense);
        }
        
        fprintf(stdout, "%s: (p25+, p75+) industry distance = (%.4lf, %.4lf)\n", argv[0], params.q25, params.q75); //, params.cut);
        fprintf(stdout, "%s: k-density optimal bandwidth set to %.4lf\n", argv[0], params.bandw);
        
        //Write the empirical kdensity to a file of the form (kdense, dist)
        
        FILE *osdata;
        
        if((osdata = fopen(dpath, "w")) == NULL)
        {
            fprintf(stdout, "%s: ERROR - Output file %s could not be opened, aborting\n", argv[0], dpath);
            return(1);
        }
        
        for (int j=0; j < params.cut; j++)
        {
            fprintf(osdata,"%+.18lf %+.10lf\n", kdense[j][0], kdense[j][1]);
        }
        
        fclose(osdata);
        
        
        //******************************************************************************************
        //STEP 3:
        //Monte-Carlo sampling procedure, consisting in replic times reshuffling of firms in the
        //location universe
        //******************************************************************************************
        
        
        //Space for mc sampling
        //int randnumber; [COMMENTED OUT 20/11/2010]
        double mcsspace[params.ssize][3];
        
        //Needed to display progress report
        int ic1 = 0;
        int step = (int) params.replic / 10;
        
        //Dynamically allocate the array to hold all the k-density estimates
        /*double **kd_array = new double*[params.replic];
         
         for (int i=0; i < params.replic; i++)
         {
         kd_array[i] = new double[params.cut];
         
         if (kd_array[i] == NULL)
         {
         fprintf(stdout, "%s: ERROR - Memory for MC k-density array could not be allocated, aborting\n", argv[0]);
         return(2);
         }
         }*/
        
        double **kd_array = alloc_array(params.replic, params.cut, argv[0]);
        
        //Set up a random generator object of type taus
        gsl_rng *randn = gsl_rng_alloc(gsl_rng_taus);
        
        //Set the generator's random seed (WARNING: USE A DIFFERENT SEED FOR EACH PROCESS, BUT KEEP TRACK)
        //Use system time (current seconds)
        gsl_rng_set(randn, time(0));
        
        //Process 1
        //gsl_rng_set(randn, 0);
        
        //Process 2
        //gsl_rng_set(randn, 10);
        
        //Process 3
        //gsl_rng_set(randn, 20);
        
        //Process 4
        //gsl_rng_set(randn, 30);
        
        //Process 5
        //gsl_rng_set(randn, 40);
        
        //Process 6
        //gsl_rng_set(randn, 50);
        
        
        
        fprintf(stdout, "%s: Starting sampling (progress display step = %d replics)\n", argv[0], step);
        
        
        for (int r = 0; r < params.replic; r++)
        {
            //NEW [20/11/2010]
            gsl_ran_shuffle(randn, permute_space, params.lsize, sizeof (int));
            //NEW [20/11/2010]
            
            //Sample random numbers between 0 and length(location_universe)
            //params.exit_proc = FALSE;
            
            //NEW_29_12_2011
            if (params.exit_proc == FALSE)
            {
                for (int i = 0; i < params.ssize; i++)
                {
                    //Generate random number between [0,1] * size of the location space
                    //randnumber = (int) (gsl_ran_flat(randn, 0, 1) * params.lsize); [COMMENTED OUT 20/11/2010]
                    
                    //Random location latitude
                    //mcsspace[i][0] = locspace[randnumber][0]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][0] = locspace[permute_space[i]][0];
                    //The reshuffling just picks the first ssize number of random locations from the reshuffled location universe
                    
                    //Random location longitude
                    //mcsspace[i][1] = locspace[randnumber][1]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][1] = locspace[permute_space[i]][1];
                    
                    //Employment of the randomly reallocated firm
                    //mcsspace[i][2] = disspace[i][2]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][2] = disspace[i][2];
                }
            }
            else {
                for (int i = 0; i < params.zsize; i++)
                {
                    //Generate random number between [0,1] * size of the location space
                    //randnumber = (int) (gsl_ran_flat(randn, 0, 1) * params.lsize); [COMMENTED OUT 20/11/2010]
                    
                    //Random location latitude
                    //mcsspace[i][0] = locspace[randnumber][0]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][0] = locspace[permute_space[i]][0];
                    //The reshuffling just picks the first ssize number of random locations from the reshuffled location universe
                    
                    //Random location longitude
                    //mcsspace[i][1] = locspace[randnumber][1]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][1] = locspace[permute_space[i]][1];
                    
                    //Employment of the randomly reallocated firm
                    //mcsspace[i][2] = disspace[i][2]; [COMMENTED OUT 20/11/2010]
                    mcsspace[i][2] = disspace[i][2];
                }
                for (int i = params.zsize; i < params.ssize; i++)
                {
                    mcsspace[i][0] = locspace[i][0];
                    
                    mcsspace[i][1] = locspace[i][1];
                    
                    mcsspace[i][2] = disspace[i][2];
                }
            }
            //NEW_29_12_2011
            
            
            //Compute the Monte-Carlo distances for the confidence bounds
            compute_distances(&params, mcsspace, distab);
            
            c = 0;
            
            for (int i = 0; i < params.ssize; i++)
            {
                for (int j = i+1; j < params.ssize ; j++, c++)
                {
                    distemp[c] = distab[i][j];
                    distemp2[c] = distab[i][j];
                    wtemp[c] = weights[i][j];
                }
            }
            
            for (int i = 0; i < params.ssize; i++)
            {
                for (int j = i+1; j < params.ssize ; j++, c++)
                {
                    distemp[c] = -distab[i][j];
                    distemp2[c] = -distab[i][j];
                    wtemp[c] = weights[i][j];
                }
            }
            
            
            //Set bandwidth according to Silverman (1986)
            //gsl_sort(distemp, 1, params.ssize * (params.ssize - 1));
            //params.q75 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.75);
            //params.q25 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.25);
            
            gsl_sort_index(wpermute, distemp, 1, params.ssize * (params.ssize - 1));
            gsl_sort(distemp, 1, params.ssize * (params.ssize - 1));
            
            for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
            {
                wtemp2[i] = wtemp[wpermute[i]]/params.tot_weight;
            }
            
            
            //Compute the weighted sample interquartile range
            temp1 = temp2 = 0.0;
            
            for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
            {
                temp1 += wtemp2[i];
                temp2 = temp1 + wtemp2[i+1];
                if (temp1 <= 0.25 && temp2 > 0.25)
                {
                    params.q1 = i;
                    break;
                }
            }
            
            temp1 = temp2 = 0.0;
            
            for (int i = 0; i < params.ssize * (params.ssize - 1); i++)
            {
                temp1 += wtemp2[i];
                temp2 = temp1 + wtemp2[i+1];
                if (temp1 <= 0.75 && temp2 > 0.75)
                {
                    params.q2 = i;
                    break;
                }
            }
            
            params.p1 = 0.25; params.p2 = 0.75;
            
            for (int i = 0; i < params.q1; i++)
            {
                params.p1 -= wtemp2[i];
            }
            
            for (int i = 0; i < params.q2; i++)
            {
                params.p2 -= wtemp2[i];
            }
            
            
            params.q75 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.75);
            params.q25 = gsl_stats_quantile_from_sorted_data(distemp, 1, params.ssize * (params.ssize - 1), 0.25);
            
            params.Q1 = distemp[params.q1] + params.p1*(distemp[params.q1 + 1] - distemp[params.q1]);
            params.Q3 = distemp[params.q2] + params.p2*(distemp[params.q2 + 1] - distemp[params.q2]);
            
            
            //Determine the optimal bandwidth according to Silverman (1986), depending on whether weights are used or not
            if (!params.weight_flag)
            {
                params.bandw = 1.06 * pow(params.ssize * (params.ssize - 1), -0.2) * gsl_min(gsl_stats_sd(distemp,
                                                                                                          1, params.ssize * (params.ssize - 1)), (params.q75 - params.q25) / 1.34);
                compute_kdense(&params, weights, distab, kdense);
            }
            else
            {
                //params.bandw = 1.06 * pow(params.tot_weight / 2.0, -0.2) * gsl_min(gsl_stats_sd(distemp,
                //																1, params.ssize * (params.ssize - 1)), (params.q75 - params.q25) / 1.34);
                
                //FIXED THE WEIGHTED BANDWIDTH SELECTION
                params.bandw = 1.06 *pow(params.ssize * (params.ssize - 1), -0.2) * gsl_min(gsl_stats_wsd(wtemp, 1, distemp2, 1,
                                                                                                          params.ssize * (params.ssize - 1)), (params.Q3 - params.Q1) / 1.34);
                
                compute_kdense_w(&params, weights, distab, kdense);
            }	
            
            //Store away the k-density in the array holding all the k-densities for the various replications
            for (int i=0; i < params.cut; i++)
            {
                kd_array[r][i] = kdense[i][0];
            }
            
            //Display progress report (useful for large samples essentially)
            if (r % step == 0)
            {
                ic1++;
                printf("%s: %2.0f%% done\n", argv[0], ((ic1-1) / 10.0) * 100.0);
            }
            
        }
        
        //Get the 5% lower and 95% upper local bounds, write output to file
        FILE *mcdata;
        
        if((mcdata = fopen(opath, "w")) == NULL) 
        {
            fprintf(stdout, "%s: ERROR - Sample MC output file %s could not be opened, aborting\n", argv[0], opath);
            return(1);
        }
        
        //Variables for the global confidence bounds
        double confbtemp[params.replic];
        double q95 = 0, q95g = 0;
        double q05 = 0, q05g = 0;
        double gtarget = 0.05 / params.cut;
        
        //Holds kdense .1 percentile tables for global bounds
        /*double **gl_array = new double*[100];
         
         for (int i=0; i < 100; i++)
         {
         gl_array[i] = new double[params.cut];
         
         if (gl_array[i] == NULL)
         {
         fprintf(stdout, "%s: ERROR - Memory for MC k-dense array could not be allocated, aborting\n", argv[0]);
         return(2);
         }
         }*/
        
        double **gl_array = alloc_array(100, params.cut, argv[0]);
        
        for (int i=0; i < params.cut; i++)
        {
            for (int j=0; j < params.replic; j++)
            {
                confbtemp[j] = kd_array[j][i];
            }
            
            gsl_sort(confbtemp, 1, params.replic);
            q95 = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, 0.95);
            q05 = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, 0.05);
            
            q95g = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, 1-gtarget);
            q05g = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, gtarget);
            
            fprintf(mcdata, "%lf %lf %lf %lf %d\n", q95, q05, q95g, q05g, i);
            
            for (int t=0; t < 50; t++)
            {
                double rr = (double) (1000.0 - t) / 1000.0;
                q95g = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, rr);
                gl_array[t][i] = q95g;
            }
            
            for (int t=50; t < 100; t++)
            {
                double rr = (double) (100 - t) / 1000;
                q05g = gsl_stats_quantile_from_sorted_data(confbtemp, 1, params.replic, rr);
                gl_array[t][i] = q05g;
            }
        }
        
        fprintf(stdout, "%s: Sampling output written to %s\n", argv[0], opath);
        
        fclose(mcdata);
        
        //Free the allocated memory to avoid memory leaks
        
        /*for (int i = 0; i < params.ssize; i++)
         {
         free(distab[i]);
         }
         
         free(distab);*/
        
        distab = free_array(params.ssize, distab, argv[0]);
        
        /*for (int i = 0; i < params.ssize; i++)
         {
         free(weights[i]);
         }
         
         free(weights);*/
        
        weights = free_array(params.ssize, weights, argv[0]);
        
        /*for (int i = 0; i < params.replic; i++)
         {
         free(kd_array[i]);
         }
         
         free(kd_array);*/
        
        kd_array = free_array(params.replic, kd_array, argv[0]);
        
        /*for (int i = 0; i < 100; i++)
         {
         free(gl_array[i]);
         }
         
         free(gl_array);*/
        
        gl_array = free_array(100, gl_array, argv[0]);
        
        free(distemp);
        free(distemp2);
        free(wtemp);
        free(wtemp2);
        free(wpermute);
        
        gsl_rng_free(randn);
        
        finish = clock();
        
        fprintf(stdout, "%s: Total time for processing %s = %d seconds\n", argv[0], params.temp, (int) ((finish - start) / CLOCKS_PER_SEC));
    }
    
    fclose(inlist);
    fclose(warn_log);
    
    fprintf(stdout, "%s: END\n", argv[0]);
    
    return(0);
}
